## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

1 Getting started

1.1 installing library from gitlab

To use synapsis, you will need the following packages:

  • stats
  • EBImage
  • graphics
  • utils

And to run this tutorial, you will need:

  • tidyverse
  • ggplot2
  • knitr
  • rmarkdown
devtools::install_git('https://gitlab.svi.edu.au/lmcneill/synapsis')

1.2 loading synapsis

library(synapsis)
library(EBImage)
## 
## Attaching package: 'EBImage'
## The following object is masked from 'package:purrr':
## 
##     transpose

1.3 checking documentation

1.4 Data preparation

For the moment, we will use the example images which come with synapsis.

path = paste0(system.file("extdata",package = "synapsis"))

1.5 Quick look at data with EBImage

The example images are from the same slide. One of them “-RGB” is a false colour three channel image. The other three are the single channels corresponding to specific antibodies or stains. For now, we will take a look at the sample image

file_3channel <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-RGB.jpeg")
image_3channel <- readImage(file_3channel)
display(image_3channel)

After a look at the image, we note that SYCP3 and MLH3 antibodies illuminate synaptonemal complexes (red) and foci (green) respectively. DAPI stains cells (blue). In functions, channel1_string refers to the antibody/ identifier for the foci channel (green here). It defaults to *-MLH3. channel2_string is the antibody/ identifier for the synaptonemal complex / dna channel (red here), which defaults to *-SYCP3. channel3_string is optional antibody/ identifier for a third channel. It defaults to *DAPI. We can split these up

r = channel(image_3channel,"r")
g = channel(image_3channel,"g")
b = channel(image_3channel,"b")

Let’s confirm that this is consistent with our filenames.

1.5.1 MLH3 channel

display(g)

Now compare to the file ending in MLH3

file_MLH3 <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-MLH3.jpeg")
image_MLH3 <- readImage(file_MLH3)
display(image_MLH3)

They should be the same.

1.5.2 SYCP3 channel

display(r)

Now compare to the file ending in SYCP3

file_SYCP3 <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-SYCP3.jpeg")
image_SYCP3 <- readImage(file_SYCP3)
display(image_SYCP3)

That is a short introduction to the file naming convention in synapsis. We will go into more detail at the end of the tutorial, particularly what this means for your dataset.

1.5.3 DAPI channel (optional)

And DAPI channel

display(b)

Now compare to the file ending in SYCP3

file_DAPI <- paste0(path,"/MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006-DAPI.jpeg")
image_DAPI <- readImage(file_DAPI)
display(image_DAPI)

2 Calling functions on data

2.1 Cropping routine

You can type

??auto_crop_fast

There is an annotation setting, which we switch to “on”. max_cell_area and min_cell_area have been calibrated to our data set, where the subject are mouse cells and magnification kept constant. You could run it on the first five images by setting e.g. test_amount = 5. But for now we will just look at the single image.

auto_crop_fast(path, annotation = "on", max_cell_area = 30000, min_cell_area = 7000, test_amount = 1)
## [1] "couldn't crop it since cell is on the edge. Neglected the following mask of a cell candidate:"

## [1] "from the file:"
## [1] "MLH3rabbit488_SYCP3mouse594_fancm_fvb_x_fancm_bl6_724++_slide01_006"

## [1] "I cropped this cell:"

## [1] "using this mask"

## [1] "whose cell number is"
## [1] 2
## [1] "out of"
## [1] 0
## [1] "images, we got"
## [1] 2
## [1] "viable cells"

Here we called path, plus other optional parameters (that would otherwise take on default values). But only path is essential. This is because auto_crop_fast has built-in default values which are assumed when the user doesn’t specify.

A crops folder with three channels per “viable cell” should have been generated inside the folder where these images are kept i.e. in path.

2.2 Getting pachytene

SYCP3_stats <- get_pachytene(path,ecc_thresh = 0.8, area_thresh = 0.04, annotation = "on")
## [1] "decided the following is pachytene"

## [1] "number of cells kept"
## [1] 1

SYCP3_stats is a data frame summarizing some features of the cells classified as pachytene. ## Counting foci

foci_counts <- count_foci(path,offset_factor = 3, brush_size = 3, brush_sigma = 3, annotation = "on",stage = "pachytene")
## [1] "cell counter is"
## [1] 1
## [1] "original images"

## [1] "displaying resulting foci count"
## [1] "Overlay two channels"

## [1] "coincident foci"

## [1] "two channels, only coincident foci"

## [1] "which counts this many foci:"
## [1] 29
## [1] "number of alone foci"
## [1] 51
## [1] "percentage of foci channel coincident:"
## [1] 49.84026

foci_counts is a data frame summarizing some features (including foci counts) of the cells classified as pachytene.

2.3 Distance between foci on SC

df_dist <- measure_distances(path, annotation = "on")
## [1] "looking at cell number:"
## [1] 1
## [1] "foci located at"
## [1] 76.92857 50.85714 70.06667 72.73333
## [1] "total distance is"
## [1] 47.97056

## [1] "with foci locations included (actual)"

## [1] "with foci locations included, on the line"

## [1] "here are the results for this SC"

## [1] "the distance strands measure"
## [1] 16.82843 31.14214
## [1] "distance between foci (in pixels) was"
## [1] 24.89949
## [1] "distance as a fraction of total length is"
## [1] 0.5190578
## [1] "This strand managed to pass through."
## [1] "Check that it walked successfully. Total length in red, distance between two in white, and foci locations in magenta."

3 Bigger data set

Now we will run auto_crop on the bigger data set (silently/ without annotation) to do some significance testing.

start_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15) # place at start
auto_crop_fast(path, annotation = "off", max_cell_area = 30000, min_cell_area = 7000,third_channel = "on")
end_time <- as.numeric(as.numeric(Sys.time())*1000, digits=15)
print(end_time - start_time)

3.1 Counting foci

Now let’s count the foci for each genotype.

SYCP3_stats <- get_pachytene(path,ecc_thresh = 0.8, area_thresh = 0.04, annotation = "off")
foci_counts <- count_foci(path,offset_factor = 3, brush_size = 3, brush_sigma = 3, annotation = "off",stage = "pachytene")

Instead of evaluating, we will load the resulting dataframe from executing the above chunk on a large dataset. In this example data set (images taken by Vanessa Tsui in 2020 at SVI), there are 174 images where auto_crop_fast yields 243 crops. This took 1.2 minutes.

4 Statistics

4.1 loading files

demo_path <-paste0(system.file("extdata",package = "synapsis"),"/")
foci_count_path <- paste0(demo_path, "df_foci_count.csv")
foci_counts <- read.csv(foci_count_path)

4.2 Some statistics

### comparing groups
counts <- foci_counts$foci_count
counts_mod <- foci_counts[as.numeric(foci_counts$foci_count) > 0,]
counts_mod <- foci_counts[as.numeric(foci_counts$foci_count) < 40,]
#counts_mod <- counts_mod[as.numeric(counts_mod$percent_on) > 0.55,]
# counts_mod <- counts_mod[as.numeric(counts_mod$sd_foci) <20,]
counts <- counts_mod$foci_count
counts_KO <- counts_mod[counts_mod$genotype == "Fancm-/-",]
counts_WT <- counts_mod[counts_mod$genotype == "Fancm+/+",]

count_KO <- counts_KO$foci_count
count_WT <- counts_WT$foci_count
mean(as.numeric(count_KO), na.rm= TRUE)
## [1] 29.80851
mean(as.numeric(count_WT), na.rm= TRUE)
## [1] 20.84848
sd(as.numeric(count_KO), na.rm= TRUE)
## [1] 5.651619
sd(as.numeric(count_WT), na.rm= TRUE)
## [1] 10.54787
c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(count_WT),plot = FALSE)
B <-  hist(as.numeric(count_KO), plot = FALSE )
plot(A,ylim = c(0,20),  main = "Pachytene", col = c4, xlab = "foci count per cell") 
plot(B, col = c1, add = TRUE) 

4.3 comparison testing

4.3.1 anova test

## anova test
counts_mod$group <- factor(counts_mod$genotype, c("Fancm-/-", "Fancm+/+"))
outfit <- lm(foci_count ~ genotype, data=counts_mod)
outfit
## 
## Call:
## lm(formula = foci_count ~ genotype, data = counts_mod)
## 
## Coefficients:
##      (Intercept)  genotypeFancm+/+  
##            29.81             -8.96
#df.residual(outfit)
#sigma(outfit)
#model.matrix(outfit)
outfit0 <- lm(foci_count ~ 1, data=counts_mod)
anova(outfit0, outfit)
## Analysis of Variance Table
## 
## Model 1: foci_count ~ 1
## Model 2: foci_count ~ genotype
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     79 6586.0                                  
## 2     78 5029.5  1    1556.5 24.138 4.841e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3.2 t test

t.test(as.numeric(count_KO),as.numeric(count_WT))
## 
##  Welch Two Sample t-test
## 
## data:  as.numeric(count_KO) and as.numeric(count_WT)
## t = 4.4517, df = 44.931, p-value = 5.577e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   4.906032 13.014020
## sample estimates:
## mean of x mean of y 
##  29.80851  20.84848

now that we have a p value we can paste this on the histogram

c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(count_WT),plot = FALSE)
B <-  hist(as.numeric(count_KO), plot = FALSE )
plot(A,ylim = c(0,20),  main = "Pachytene", col = c4, xlab = "foci count per cell") 
text(x = 10, y = 15, label = "anova p value = 0.01*", col = "black", cex = 1)
plot(B, col = c1, add = TRUE) 

4.3.3 Boxplot

library(tidyverse)
counts_mod %>% 
  ggplot(aes(x=genotype, y=as.numeric(foci_count), fill = genotype)) + 
  geom_boxplot(width=0.5,lwd=1.5) + 
  geom_jitter(width=0.15) + 
  labs(subtitle="MLH3 foci counts")

4.4 Measuring distances

We can also get some stats on the distances measured.

distances_path <- paste0(demo_path, "df_distances.csv")
df_dist <- read.csv(distances_path)
pass_only <- df_dist[df_dist$pass_fail == "pass",]
distances <- pass_only$fractional_distance
distances_KO <- pass_only[pass_only$genotype == "Fancm-/-",]
distances_WT <- pass_only[pass_only$genotype == "Fancm+/+",]

distance_KO <- distances_KO$fractional_distance
distance_WT <- distances_WT$fractional_distance
mean(as.numeric(distance_KO), na.rm= TRUE)
## [1] 0.4400367
mean(as.numeric(distance_WT), na.rm= TRUE)
## [1] 0.4232971
sd(as.numeric(distance_KO), na.rm= TRUE)
## [1] 0.160769
sd(as.numeric(distance_WT), na.rm= TRUE)
## [1] 0.1778392
c1 <- rgb(173,216,230,max = 255, alpha = 140, names = "lt.blue")
c4 <- rgb(255,180,50, max = 255, alpha = 120, names = "lt.orange")
A <- hist(as.numeric(distance_WT),plot = FALSE)
B <-  hist(as.numeric(distance_KO), plot = FALSE )
plot(A,ylim = c(0,9),xlim = c(0,1),  main = "Pachytene", col = c4, xlab = "foci distance as fraction of total length") 
#text(x = 0.2, y = 7, label = "anova p value 0.9 (NS)", col = "black", cex = 1)
plot(B, col = c1, add = TRUE) 

t.test(as.numeric(distance_KO),as.numeric(distance_WT))
## 
##  Welch Two Sample t-test
## 
## data:  as.numeric(distance_KO) and as.numeric(distance_WT)
## t = 0.36915, df = 46.189, p-value = 0.7137
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07452709  0.10800626
## sample estimates:
## mean of x mean of y 
## 0.4400367 0.4232971

5 Running on your own data

5.1 .nd2 files

5.2 3 channel / false colour jpeg, tiff or pngs